In this script, I take the collated stomach data set and calculate aggregates (feeding ratio, total weight of prey groups) and predictor variables for diet data, aggregate to get 1 stomach = 1 row per prey type (not prey individual). I also select only the columns I need for model fitting, join environmental covariates and cpue covariates for cod and flounder, and lastly saduria biomass densities.
# Load libraries, install if needed
library(tidyverse)
#> Warning: package 'tidyr' was built under R version 4.0.5
library(readxl)
library(tidylog)
library(RCurl)
library(RColorBrewer)
#> Warning: package 'RColorBrewer' was built under R version 4.0.5
library(patchwork)
library(janitor)
library(forcats)
library(gapminder)
library(viridis)
library(ggridges)
library(raster)
library(icesDatras)
library(ggalluvial)
library(ggrepel)
library(ncdf4)
library(chron)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
library(quantreg)
#> Warning in .recacheSubclasses(def@className, def, env): undefined subclass
#> "numericVector" of class "Mnumeric"; definition not updated
library(brms)
#> Warning: package 'Rcpp' was built under R version 4.0.5
library(sdmTMB)
library(sp)
library(raster)
library(modelr)
options(mc.cores = parallel::detectCores())
# Source code for map plots
source("/Users/maxlindmark/Dropbox/Max work/R/cod_interactions/R/functions/map_plot.R")
# Load cache
# qwraps2::lazyload_cache_dir(path = "R/prepare_data/04_density_models_for_covars_cache/html")
theme_set(theme_plot())
# Continuous colors
options(ggplot2.continuous.colour = "viridis")
# Discrete colors
scale_colour_discrete <- function(...) {
scale_colour_brewer(palette = "Dark2")
}
scale_fill_discrete <- function(...) {
scale_fill_brewer(palette = "Dark2")
}
dat <- read_csv("data/clean/clean_stomach_data.csv")
#> Rows: 5885 Columns: 29
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (7): pred_id, predator_latin_name, species, ices_rect, haul_id, pred_we...
#> dbl (22): pred_weight_g, pred_length_cm, year, quarter, month, day, subdiv, ...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
We know from the data exploration that cod in contrast to flounder has a clear ontogenetic diet shift towards pelagic prey,
dat_logistic <- dat %>%
filter(tot_feeding_ratio > 0 & species == "Cod") %>%
mutate(fish_presence = ifelse(pelagic_feeding_ratio == 0, 0, 1))
#> filter: removed 3,219 rows (55%), 2,666 rows remaining
#> mutate: new variable 'fish_presence' (double) with 2 unique values and 0% NA
ggplot(dat_logistic, aes(pred_length_cm, factor(fish_presence))) +
geom_jitter(alpha = 0.5)
glm1 <- glm(fish_presence ~ pred_length_cm,
data = dat_logistic,
family = binomial)
coef(glm1)
#> (Intercept) pred_length_cm
#> -4.6362921 0.1211906
a <- coef(glm1)[1]
b <- coef(glm1)[2]
l50 <- -a/b
l50
#> (Intercept)
#> 38.25622
lrPerc <- function(cf,p) (log(p/(1-p))-cf[1])/cf[2]
l20 <- lrPerc(coef(glm1), 0.20)
l20
#> (Intercept)
#> 26.81725
nd <- data.frame(pred_length_cm = seq_range(dat_logistic$pred_length_cm, n = 100))
nd$pred <- predict(glm1, newdata = nd, type = "response")
# Add glm prediction to plot
p1 <- ggplot(dat_logistic, aes(pred_length_cm, fish_presence)) +
geom_jitter(alpha = 0.3, shape = 21, size = 0.7, fill = "white", stroke = 1.1, width = 0, height = 0.4) +
geom_line(data = nd, aes(pred_length_cm, pred), color = "tomato", size = 1.2) +
geom_segment(x = l50, y = -10, xend = l50, yend = 0.5, color = "tomato", size = 1, aes(linetype = "L50")) +
geom_segment(x = l20, y = -10, xend = l20, yend = 0.2, color = "tomato", size = 1, aes(linetype = "L20")) +
labs(y = "Fish presence", x = "Predator length (cm)", linetype = "") +
coord_cartesian(expand = 0) +
scale_y_continuous(breaks = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 80, 5), lim = c(0, 85)) +
theme_plot()
#ggsave("figures/supp/glm.pdf", width = 17, height = 17, units = "cm")
# Filter cod based on what prey they have in stomachs
cod_pelagic <- dat %>%
filter(species == "Cod" & pelagic_feeding_ratio/tot_feeding_ratio > 0.5)
#> filter: removed 5,021 rows (85%), 864 rows remaining
cod_pelagic2 <- dat %>%
filter(species == "Cod" & pelagic_feeding_ratio == tot_feeding_ratio)
#> filter: removed 4,745 rows (81%), 1,140 rows remaining
m1 <- rq(pred_length_cm ~ 1, data = cod_pelagic, tau = 0.05)
summary(m1)
#>
#> Call: rq(formula = pred_length_cm ~ 1, tau = 0.05, data = cod_pelagic)
#>
#> tau: [1] 0.05
#>
#> Coefficients:
#> (Intercept)
#> 26
intercept <- as.vector(m1$coefficients["(Intercept)"])
p2 <- ggplot() +
geom_histogram(data = dat %>% filter(species == "Cod"), aes(pred_length_cm, fill = "All cod"), alpha = 0.5) +
geom_histogram(data = cod_pelagic, aes(pred_length_cm, fill = "Majority pelagic biomass"), alpha = 0.5) +
geom_vline(xintercept = intercept, linetype = 2, color = brewer.pal("Dark2", n = 3)[2], size = 1.2) +
coord_cartesian(expand = 0) +
scale_x_continuous(breaks = seq(0, 80, 5), lim = c(0, 85)) +
labs(fill = "", x = "Predator length (cm)", y = "Count")
#> filter: removed 2,579 rows (44%), 3,306 rows remaining
p2 / p1
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Removed 2 rows containing missing values (geom_bar).
ggsave("figures/supp/l10_quant_sizedist.pdf", width = 17, height = 17, units = "cm")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Removed 2 rows containing missing values (geom_bar).
# fit2 <- brm(bf(predator_length_cm ~ 1, quantile = 0.1), data = cod_pelagic, family = asym_laplace())
# summary(fit2)
# plot(fit2)
# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float o2b[longitude,latitude,time]
#> long_name: Sea_floor_Dissolved_Oxygen_Concentration
#> missing_value: NaN
#> standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#> units: mmol m-3
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:336
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25917.5
#> latitude Size:523
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 48.49169921875
#> valid_max: 65.8914184570312
#> longitude Size:383
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 30.2357654571533
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#> title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#> file_quality_index: 1
#> creation_date: 2021-11-09 UTC
#> bullentin_date: 20201201
#> start_date: 2020-12-01 UTC
#> stop_date: 2020-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get oxygen
dname <- "o2b"
oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 383 523 336
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA
# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [326] 2 3 4 5 6 7 8 9 10 11 12
index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)
oxy_q1 <- oxy_array[, , index_keep_q1]
oxy_q4 <- oxy_array[, , index_keep_q4]
months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]
years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]
# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(oxy_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(oxy_q4)[3], by = 3)
# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()
oxy_1 <- c()
oxy_2 <- c()
oxy_3 <- c()
oxy_ave_q1 <- c()
oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave_q4 <- c()
# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want.
for(i in loop_seq_q1) { # We can use q1 as looping index, doesn't matter!
oxy_1 <- oxy_q1[, , (i)]
oxy_2 <- oxy_q1[, , (i + 1)]
oxy_3 <- oxy_q1[, , (i + 2)]
oxy_10 <- oxy_q4[, , (i)]
oxy_11 <- oxy_q4[, , (i + 1)]
oxy_12 <- oxy_q4[, , (i + 2)]
oxy_ave_q1 <- (oxy_1 + oxy_2 + oxy_3) / 3
oxy_ave_q4 <- (oxy_10 + oxy_11 + oxy_12) / 3
list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist_q1[[list_pos_q1]] <- oxy_ave_q1
dlist_q4[[list_pos_q4]] <- oxy_ave_q4
}
# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy_q1 <- dat %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 2,028 rows (34%), 3,857 rows remaining
#> filter: no rows removed
d_sub_oxy_q4 <- dat %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 3,857 rows (66%), 2,028 rows remaining
#> filter: no rows removed
# Create data holding object
oxy_data_list_q1 <- list()
oxy_data_list_q4 <- list()
# ... And for the oxygen raster
raster_list_q1 <- list()
raster_list_q4 <- list()
# Create factor year for indexing the list in the loop
d_sub_oxy_q1$year_f <- as.factor(d_sub_oxy_q1$year)
d_sub_oxy_q4$year_f <- as.factor(d_sub_oxy_q4$year)
sort(unique(d_sub_oxy_q1$year_f))
#> [1] 2016 2017 2018 2020
#> Levels: 2016 2017 2018 2020
sort(unique(d_sub_oxy_q4$year_f))
#> [1] 2015 2016 2017 2018 2019
#> Levels: 2015 2016 2017 2018 2019
# We have missing years. Add fake data to not break the loop
d_sub_oxy_q1_extra <- d_sub_oxy_q4 %>% dplyr::select(year, lat, lon) %>% head(2) %>% mutate(year = c(2015, 2019))
#> mutate: changed one value (50%) of 'year' (0 new NA)
d_sub_oxy_q1 <- bind_rows(d_sub_oxy_q1, d_sub_oxy_q1_extra) %>%
mutate(year_f = as.factor(year))
#> mutate: changed 2 values (<1%) of 'year_f' (2 fewer NA)
d_sub_oxy_q4_extra <- d_sub_oxy_q4 %>% dplyr::select(year, lat, lon) %>% head(1) %>% mutate(year = 2020)
#> mutate: changed one value (100%) of 'year' (0 new NA)
d_sub_oxy_q4 <- bind_rows(d_sub_oxy_q4, d_sub_oxy_q4_extra) %>%
mutate(year_f = as.factor(year))
#> mutate: changed one value (<1%) of 'year_f' (1 fewer NA)
# Loop through each year and extract raster values for the data points
for(i in as.factor(c(2015:2020))) {
# Set plot limits
ymin = 54; ymax = 58; xmin = 12; xmax = 22
# Subset a year
oxy_slice_q1 <- dlist_q1[[i]]
oxy_slice_q4 <- dlist_q4[[i]]
# Create raster for that year (i)
r_q1 <- raster(t(oxy_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
r_q4 <- raster(t(oxy_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r_q1 <- flip(r_q1, direction = 'y')
r_q4 <- flip(r_q4, direction = 'y')
plot(r_q1, main = paste(i, "Q1"))
plot(r_q4, main = paste(i, "Q4"))
# Filter the same year (i) in the data and select only coordinates
d_slice_q1 <- d_sub_oxy_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
d_slice_q4 <- d_sub_oxy_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp_q1 <- SpatialPoints(d_slice_q1)
data_sp_q4 <- SpatialPoints(d_slice_q4)
# Extract raster value (oxygen)
rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for plot)
df_q1 <- as.data.frame(data_sp_q1)
df_q4 <- as.data.frame(data_sp_q4)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice_q1$oxy <- rasValue_q1
d_slice_q4$oxy <- rasValue_q4
# Add in which year
d_slice_q1$year <- i
d_slice_q4$year <- i
# Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
# and it's been converted by the data host. Since it was converted without accounting for
# pressure or temperature, I can simply use the following conversion factor:
# 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
# https://ocean.ices.dk/tools/unitconversion.aspx
d_slice_q1$oxy <- d_slice_q1$oxy * 0.0223909
d_slice_q4$oxy <- d_slice_q4$oxy * 0.0223909
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index_q1 <- as.numeric(as.character(d_slice_q1$year))[1] - 1992
index_q4 <- as.numeric(as.character(d_slice_q4$year))[1] - 1992
# Add each years' data in the list
oxy_data_list_q1[[index_q1]] <- d_slice_q1
oxy_data_list_q4[[index_q4]] <- d_slice_q4
}
#> filter: removed 3,858 rows (>99%), one row remaining
#> filter: removed 1,509 rows (74%), 520 rows remaining
#> filter: removed 3,093 rows (80%), 766 rows remaining
#> filter: removed 1,818 rows (90%), 211 rows remaining
#> filter: removed 3,084 rows (80%), 775 rows remaining
#> filter: removed 1,483 rows (73%), 546 rows remaining
#> filter: removed 2,759 rows (71%), 1,100 rows remaining
#> filter: removed 1,930 rows (95%), 99 rows remaining
#> filter: removed 3,858 rows (>99%), one row remaining
#> filter: removed 1,377 rows (68%), 652 rows remaining
#> filter: removed 2,643 rows (68%), 1,216 rows remaining
#> filter: removed 2,028 rows (>99%), one row remaining
# Now create a data frame from the list of all annual values
big_dat_oxy_q1 <- dplyr::bind_rows(oxy_data_list_q1)
big_dat_oxy_q4 <- dplyr::bind_rows(oxy_data_list_q4)
big_dat_oxy <- bind_rows(mutate(big_dat_oxy_q1, quarter = 1),
mutate(big_dat_oxy_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
# Create an ID for matching the temperature data with the data
big_dat_oxy$id_env <- paste(big_dat_oxy$year, big_dat_oxy$quarter, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")
big_dat_oxy <- big_dat_oxy %>% distinct(id_env, .keep_all = TRUE)
#> distinct: removed 5,689 rows (97%), 199 rows remaining
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float bottomT[longitude,latitude,time]
#> standard_name: sea_water_potential_temperature_at_sea_floor
#> units: degrees_C
#> long_name: Sea floor potential temperature
#> missing_value: NaN
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:336
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25917.5
#> latitude Size:523
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 48.49169921875
#> valid_max: 65.8914184570312
#> longitude Size:383
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 30.2357654571533
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#> title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#> file_quality_index: 1
#> creation_date: 2021-11-09 UTC
#> bullentin_date: 20201201
#> start_date: 2020-12-01 UTC
#> stop_date: 2020-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get temperature
dname <- "bottomT"
temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 336
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA
# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [326] 2 3 4 5 6 7 8 9 10 11 12
index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)
temp_q1 <- temp_array[, , index_keep_q1]
temp_q4 <- temp_array[, , index_keep_q4]
months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]
years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]
# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(temp_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(temp_q4)[3], by = 3)
# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()
temp_1 <- c()
temp_2 <- c()
temp_3 <- c()
temp_ave_q1 <- c()
temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave_q4 <- c()
# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want.
for(i in loop_seq_q1) {
temp_1 <- temp_q1[, , (i)]
temp_2 <- temp_q1[, , (i + 1)]
temp_3 <- temp_q1[, , (i + 2)]
temp_10 <- temp_q4[, , (i)]
temp_11 <- temp_q4[, , (i + 1)]
temp_12 <- temp_q4[, , (i + 2)]
temp_ave_q1 <- (temp_1 + temp_2 + temp_3) / 3
temp_ave_q4 <- (temp_10 + temp_11 + temp_12) / 3
list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist_q1[[list_pos_q1]] <- temp_ave_q1
dlist_q4[[list_pos_q4]] <- temp_ave_q4
}
# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp_q1 <- dat %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 2,028 rows (34%), 3,857 rows remaining
#> filter: no rows removed
d_sub_temp_q4 <- dat %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 3,857 rows (66%), 2,028 rows remaining
#> filter: no rows removed
# Create data holding object
temp_data_list_q1 <- list()
temp_data_list_q4 <- list()
# ... And for the temperature raster
raster_list_q1 <- list()
raster_list_q4 <- list()
# Create factor year for indexing the list in the loop
d_sub_temp_q1$year_f <- as.factor(d_sub_temp_q1$year)
d_sub_temp_q4$year_f <- as.factor(d_sub_temp_q4$year)
# We have missing years. Add fake data to not break the loop
d_sub_temp_q1_extra <- d_sub_temp_q4 %>% dplyr::select(year, lat, lon) %>% head(2) %>% mutate(year = c(2015, 2019))
#> mutate: changed one value (50%) of 'year' (0 new NA)
d_sub_temp_q1 <- bind_rows(d_sub_temp_q1, d_sub_temp_q1_extra) %>%
mutate(year_f = as.factor(year))
#> mutate: changed 2 values (<1%) of 'year_f' (2 fewer NA)
d_sub_temp_q4_extra <- d_sub_temp_q4 %>% dplyr::select(year, lat, lon) %>% head(1) %>% mutate(year = 2020)
#> mutate: changed one value (100%) of 'year' (0 new NA)
d_sub_temp_q4 <- bind_rows(d_sub_temp_q4, d_sub_temp_q4_extra) %>%
mutate(year_f = as.factor(year))
#> mutate: changed one value (<1%) of 'year_f' (1 fewer NA)
# Loop through each year and extract raster values for the data points
for(i in as.factor(c(2015:2020))) {
# Set plot limits
ymin = 54; ymax = 58; xmin = 12; xmax = 22
# Subset a year
temp_slice_q1 <- dlist_q1[[i]]
temp_slice_q4 <- dlist_q4[[i]]
# Create raster for that year (i)
r_q1 <- raster(t(temp_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
r_q4 <- raster(t(temp_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r_q1 <- flip(r_q1, direction = 'y')
r_q4 <- flip(r_q4, direction = 'y')
plot(r_q1, main = paste(i, "Q1"))
plot(r_q4, main = paste(i, "Q4"))
# Filter the same year (i) in the cpue data and select only coordinates
d_slice_q1 <- d_sub_temp_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
d_slice_q4 <- d_sub_temp_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp_q1 <- SpatialPoints(d_slice_q1)
data_sp_q4 <- SpatialPoints(d_slice_q4)
# Extract raster value (temperature)
rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for pl)
df_q1 <- as.data.frame(data_sp_q1)
df_q4 <- as.data.frame(data_sp_q4)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice_q1$temp <- rasValue_q1
d_slice_q4$temp <- rasValue_q4
# Add in which year
d_slice_q1$year <- i
d_slice_q4$year <- i
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index_q1 <- as.numeric(d_slice_q1$year)[1] - 1992
index_q4 <- as.numeric(d_slice_q4$year)[1] - 1992
# Add each years' data in the list
temp_data_list_q1[[index_q1]] <- d_slice_q1
temp_data_list_q4[[index_q4]] <- d_slice_q4
# Save to check each year is ok! First convert the raster to points for plotting
# (so that we can use ggplot)
map_q1 <- rasterToPoints(r_q1)
map_q4 <- rasterToPoints(r_q4)
# Make the points a dataframe for ggplot
df_rast_q1 <- data.frame(map_q1)
df_rast_q4 <- data.frame(map_q4)
# Rename y-variable and add year
df_rast_q1 <- df_rast_q1 %>% rename("temp" = "layer") %>% mutate(year = i)
df_rast_q4 <- df_rast_q4 %>% rename("temp" = "layer") %>% mutate(year = i)
# Add each years' raster data frame in the list
raster_list_q1[[index_q1]] <- df_rast_q1
raster_list_q4[[index_q4]] <- df_rast_q4
}
#> filter: removed 3,858 rows (>99%), one row remaining
#> filter: removed 1,509 rows (74%), 520 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,093 rows (80%), 766 rows remaining
#> filter: removed 1,818 rows (90%), 211 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,084 rows (80%), 775 rows remaining
#> filter: removed 1,483 rows (73%), 546 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 2,759 rows (71%), 1,100 rows remaining
#> filter: removed 1,930 rows (95%), 99 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,858 rows (>99%), one row remaining
#> filter: removed 1,377 rows (68%), 652 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 2,643 rows (68%), 1,216 rows remaining
#> filter: removed 2,028 rows (>99%), one row remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
# Now create a data frame from the list of all annual values
big_dat_temp_q1 <- dplyr::bind_rows(temp_data_list_q1)
big_dat_temp_q4 <- dplyr::bind_rows(temp_data_list_q4)
big_dat_temp <- bind_rows(mutate(big_dat_temp_q1, quarter = 1),
mutate(big_dat_temp_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
# Create an ID for matching the temperature data with the cpue data
big_dat_temp$id_env <- paste(big_dat_temp$year, big_dat_temp$quarter, big_dat_temp$lon, big_dat_temp$lat, sep = "_")
big_dat_temp <- big_dat_temp %>% distinct(id_env, .keep_all = TRUE)
#> distinct: removed 5,689 rows (97%), 199 rows remaining
env_dat <- left_join(big_dat_oxy,
big_dat_temp %>% dplyr::select(id_env, temp),
by = "id_env") %>%
mutate(year = as.numeric(year))
#> left_join: added one column (temp)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 199
#> > =====
#> > rows total 199
#> mutate: converted 'year' from character to double (0 new NA)
dat$id_env <- paste(dat$year, dat$quarter, dat$lon, dat$lat, sep = "_")
# Now join these data with the full_dat
dat <- left_join(dat, env_dat)
#> Joining, by = c("year", "quarter", "lat", "lon", "id_env")
#> left_join: added 2 columns (oxy, temp)
#> > rows only in x 0
#> > rows only in y ( 3)
#> > matched rows 5,885
#> > =======
#> > rows total 5,885
# Temperature
dat %>%
distinct(haul_id, .keep_all = TRUE) %>%
ggplot(aes(y = lat, x = lon, color = temp)) +
geom_point() +
coord_sf(xlim = c(xmin, xmax),
ylim = c(ymin, ymax)) +
facet_grid(quarter ~ year) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
#> distinct: removed 5,685 rows (97%), 200 rows remaining
# Oxygen
dat %>%
distinct(haul_id, .keep_all = TRUE) %>%
ggplot(aes(y = lat, x = lon, color = oxy)) +
geom_point() +
coord_sf(xlim = c(xmin, xmax),
ylim = c(ymin, ymax)) +
facet_wrap(~ year) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
#> distinct: removed 5,685 rows (97%), 200 rows remaining
saduria <- raster("data/saduria_tif/FWBiomassm_raster_19812019presHighweightcor_no0_newZi.tif")
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition
saduria_longlat = projectRaster(saduria, crs = ('+proj=longlat'))
# Now extract the values from the saduria raster to the prediction grid
dat <- dat
dat$density_saduria <- extract(saduria_longlat, dat %>% dplyr::select(lon, lat))
# Plot
ggplot(dat, aes(X, Y, color = density_saduria)) +
geom_point()
small_cod_stomach <- dat %>% filter(pred_length_cm <= 25 & species == "Cod")
#> filter: removed 4,732 rows (80%), 1,153 rows remaining
large_cod_stomach <- dat %>% filter(pred_length_cm > 25 & species == "Cod")
#> filter: removed 3,732 rows (63%), 2,153 rows remaining
flounder_stomach <- dat %>% filter(species == "Flounder")
#> filter: removed 3,306 rows (56%), 2,579 rows remaining
write_csv(small_cod_stomach, "data/clean/small_cod_stomach.csv")
write_csv(large_cod_stomach, "data/clean/large_cod_stomach.csv")
write_csv(flounder_stomach, "data/clean/flounder_stomach.csv")